import datetime
import torch
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import seaborn_image as isns
comnist_data = np.loadtxt('./../datasets/latin_data.csv', delimiter=",", dtype="float32")
comnist_label = np.loadtxt('./../datasets/latin_label.csv', delimiter=",", dtype="float32")
import string
i = np.random.randint(comnist_data.shape[0])
my_letter = np.flip(comnist_data[i].reshape(28, 28), 0)
my_label = string.ascii_uppercase[int(comnist_label[i])]
print(f"Displaying letter {my_label}")
isns.imgplot(my_letter, cmap="gray", describe=True)
Displaying letter C No. of Obs. : 784 Min. Value : 0.0 Max. Value : 1.0 Mean : 0.07629089057445526 Variance : 0.05440612882375717 Skewness : 3.117643901464725
<AxesSubplot:>
from torch.utils.data import Dataset
class CoMNISTDataset(Dataset):
def __init__(
self,
labels_dir: str,
data_dir: str,
transform=None,
target_transform=None
):
self.img_labels = pd.read_csv(labels_dir)
self.img_features = pd.read_csv(data_dir)
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.img_labels)
def __getitem__(self, idx: int):
image = np.asarray(self.img_features.iloc[idx], dtype=np.float32).reshape(28,28)
label = self.img_labels.iloc[idx, 0]
if self.transform:
image = self.transform(image)
if self.target_transform:
label = self.target_transform(label)
return image, label
import torchvision.transforms.functional
comnnist_dataset = CoMNISTDataset(
labels_dir='./../datasets/latin_label.csv',
data_dir='./../datasets/latin_data.csv',
transform=transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((64,64)),
transforms.functional.vflip,
transforms.ToTensor(),
transforms.Normalize(mean=0.0942,
std=0.2352)
])
)
TRAINING_PERCENT = 0.85
training_samples = int(len(comnnist_dataset) * TRAINING_PERCENT)
validation_samples = len(comnnist_dataset) - training_samples
print(f'''
training_samples: {training_samples}
validation_samples: {validation_samples}
''')
isns.imshow(comnnist_dataset[100][0].squeeze(0))
print(comnnist_dataset[100][1])
training_samples: 10897 validation_samples: 1924 0
image = comnnist_dataset[100][0]
plt.imshow(image.permute(1, 2 ,0))
<matplotlib.image.AxesImage at 0x263b047ea60>
imgs = torch.stack([img_t for img_t, _ in comnnist_dataset], dim=3)
imgs.shape
torch.Size([1, 64, 64, 12821])
imgs.view(1, -1).mean(dim=1)
imgs.view(1, -1).std(dim=1)
train_set, val_set = torch.utils.data.random_split(
comnnist_dataset,
[training_samples, validation_samples],
generator=torch.Generator().manual_seed(42))
train_loader = torch.utils.data.DataLoader(train_set, batch_size=256)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=256, shuffle=False)
data_i = iter(train_loader)
images, labels = data_i.next()
images = images.numpy()
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(20):
ax = fig.add_subplot(2, 10, idx + 1, xticks=[], yticks=[])
ax.imshow(np.squeeze(images[idx]), cmap='gray')
ax.set_title(string.ascii_uppercase[int(labels[idx].item())])
img = np.squeeze(
transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((28,28)),
transforms.functional.invert,
transforms.ToTensor()
])(comnnist_dataset[1][0])
)
fig = plt.figure(figsize=(12, 12))
ax = fig.add_subplot(111)
ax.imshow(img, cmap='gray')
width, height = img.shape
thresh = 0.5
for x in range(width):
for y in range(height):
val = round(float(img[x][y]), 1) if float(img[x][y]) != 0 else 0
ax.annotate(str(val), xy=(y, x),
horizontalalignment='center',
verticalalignment='center',
color='white' if float(img[x][y]) < thresh else 'black')
import torch.nn as nn
import torch.nn.functional as F
class AlexNet(nn.Module):
def __init__(self, num_classes: int = 26, dropout: float = 0.5) -> None:
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(p=dropout),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(p=dropout),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
img, _ = comnnist_dataset[0]
img_batch = img.unsqueeze(0)
isns.imshow(img.permute(1,2,0))
<AxesSubplot:>
alexnet = AlexNet()
out = alexnet(img.unsqueeze(0))
out.shape
C:\Users\rkovalch\Documents\SoftServe\CondaEnvironments\MiniCondaTestEnv\lib\site-packages\torch\nn\functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at ..\c10/core/TensorImpl.h:1156.)
torch.Size([1, 26])
PATH = './../models/AlexNet_1122_11_02_28.pth'
alexnet = AlexNet()
alexnet.load_state_dict(torch.load(PATH))
<All keys matched successfully>
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(alexnet.parameters(), lr=1e-3)
n_epochs = 200
alexnet.train()
for epoch in range(n_epochs):
loss_train = 0.0
loss_val = 0.0
correct = 0
total = 0
for images, labels in train_loader:
outputs = alexnet(images)
train_loss = loss_fn(outputs, labels)
optimizer.zero_grad()
train_loss.backward()
optimizer.step()
loss_train += train_loss.item()
with torch.no_grad():
for images, labels in val_loader:
outputs = alexnet(images)
val_loss = loss_fn(outputs, labels)
_, predicted = torch.max(outputs, dim=1)
total += labels.shape[0]
correct += int((predicted == labels).sum())
loss_val += val_loss.item()
print(f'{datetime.datetime.now()} Epoch {epoch}, Training loss {loss_train / len(train_loader)}' +
f', Validation loss {loss_val / len(val_loader)}, Accuracy {correct / total}')
if loss_val / len(val_loader) <= 0.3 or loss_train / len(train_loader) <=0.15:
break
sns.set_theme(style="darkgrid")
import string
alexnet.eval()
# obtain one batch of test images
loader = torch.utils.data.DataLoader(val_set, batch_size=20, shuffle=True)
dataiter = iter(loader)
images, labels = dataiter.next()
# get sample outputs
output = alexnet(images)
# convert output probabilities to predicted class
_, preds = torch.max(output, 1)
# prep images for display
images = images.numpy()
# plot the images in the batch, along with predicted and true labels
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(20):
ax = fig.add_subplot(2, 10, idx + 1, xticks=[], yticks=[])
ax.imshow(np.squeeze(images[idx]), cmap='gray')
ax.set_title(
"{} ({})".format(string.ascii_uppercase[int(preds[idx].item())],string.ascii_uppercase[int(labels[idx].item())]),
color=("green" if preds[idx] == labels[idx] else "red"))
Using UMAP implementation from sci-kit learn library
import plotly.express as px
import matplotlib.pyplot as plt
from umap import UMAP
def chart(X, y):
arr_concat=np.concatenate(
(X, y.reshape(y.shape[0],1)),
axis=1
)
df=pd.DataFrame(
arr_concat,
columns=['x', 'y', 'z', 'label']
)
df['label'] = df['label'].astype(int)
df.sort_values(
by='label',
axis=0,
ascending=True,
inplace=True
)
fig = px.scatter_3d(
df,
x='x',
y='y',
z='z',
color=df['label'].apply(lambda x: string.ascii_uppercase[int(x)]),
height=900,
width=950
)
fig.update_layout(
title_text='UMAP',
showlegend=True,
legend=dict(orientation="h", yanchor="top", y=0, xanchor="center", x=0.5),
scene_camera=dict(
up=dict(x=0, y=0, z=1),
center=dict(x=0, y=0, z=-0.1),
eye=dict(x=1.5, y=-1.4, z=0.5)
),
margin=dict(l=0, r=0, b=0, t=0),
scene=dict(
xaxis=dict(
backgroundcolor='white',
color='black',
gridcolor='#f0f0f0',
title_font=dict(size=10),
tickfont=dict(size=10),
),
yaxis=dict(backgroundcolor='white',
color='black',
gridcolor='#f0f0f0',
title_font=dict(size=10),
tickfont=dict(size=10),
),
zaxis=dict(backgroundcolor='lightgrey',
color='black',
gridcolor='#f0f0f0',
title_font=dict(size=10),
tickfont=dict(size=10),
)
)
)
fig.update_traces(
marker=dict(
size=3,
line=dict(color='black', width=0.1)
)
)
fig.show()
import pickle
with open('C:/Users/rkovalch/Documents/SoftServe/DSProjects/MAI/Course/PyTorch-LatinLetters-Dimensionality-Reduction/models/umap/UMAP_12_10_16_58_15', 'rb') as pickle_file:
reducer = pickle.load(pickle_file)
reducer = UMAP(
n_neighbors=50,
n_components=3,
n_epochs=1000,
min_dist=0.5,
local_connectivity=5,
random_state=42
)
reduced_train_loader = torch.utils.data.DataLoader(train_set, batch_size=len(train_set), shuffle=True)
reduced_val_loader = torch.utils.data.DataLoader(val_set, batch_size=len(val_set), shuffle=False)
images, labels = iter(reduced_train_loader).next()
batch_size = images.shape[0]
images_a = images.reshape(batch_size,-1).numpy()
labels_a = labels.numpy()
X_train_res = reducer.fit_transform(images_a, labels_a)
chart(X_train_res, labels)
print(f'''Shape of X_train_res: {X_train_res.shape}''')
images, labels = iter(reduced_val_loader).next()
batch_size = images.shape[0]
images_a = images.reshape(batch_size, -1).numpy()
X_test_res = reducer.transform(images_a)
chart(X_test_res, labels)
import numpy as np
import pandas as pd
import string
X = np.load('./../datasets/reduced/latin_data_12_10_16_58_15.npy')
y = np.loadtxt('./../datasets/latin_label.csv')
chart(X, y)